# Import required libraries
import numpy as np # For array operations
import matplotlib.pyplot as plt # For displaying images
from PIL import Image # For image loading and conversion
import matplotlib.patches as patches # For drawing shapes like rectangles
# Step 1: Load the image and convert it to grayscale
# -----------------------------------------------
# PIL is used here to open the image. The 'convert("L")' method changes it to grayscale.
img = Image.open("../02_Data/number_7.jpg").convert('L')
# Step 2: Convert the grayscale image to a NumPy array
# -----------------------------------------------
# This allows numerical operations and visualization using matplotlib.
img_array = np.array(img)
# Step 3: Display the image using matplotlib
# -----------------------------------------------
# We create a figure and axis, display the image with a grayscale colormap.
fig, ax = plt.subplots()
ax.imshow(img_array, cmap='gray')
ax.set_title("Loaded Grayscale Image with Red Box") # Set image title
ax.axis('off') # Turn off axes for cleaner visualization
# Step 4: Add a red rectangle to highlight a region
# -----------------------------------------------
# Here we add a red rectangular patch to the first 10 rows and 10 columns
# Rectangle: (x=0, y=0), width=10, height=10
# Note: In matplotlib, origin is at the top-left corner.
rect = patches.Rectangle(
(0, 0), # (x, y) coordinate (top-left corner)
10, # Width of the rectangle
10, # Height of the rectangle
linewidth=2, # Border thickness
edgecolor='red', # Border color
facecolor='none' # No fill color
)
# Add the rectangle patch to the axis
ax.add_patch(rect)
# Step 5: Show the final plot
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
# Step 1: Load the image and convert to grayscale
img = Image.open("../02_Data/number_7.jpg").convert('L')
img_array = np.array(img)
# Step 2: Display only the top-left 11x11 pixel region
# This helps focus on a small region of the image for inspection
plt.imshow(img_array[:11, :11], cmap='gray')
plt.title("Top-Left 11x11 Region")
plt.axis('off') # Hide axes
plt.show()
# Step 3: Print the pixel values (grayscale intensity) of this 11x11 region
# These values range from 0 (black) to 255 (white)
print("Representation of Top-Left Region in Matrix Format:")
print(img_array[:11, :11])
Representation of Top-Left Region in Matrix Format: [[ 3 0 7 0 2 4 0 0 2 5 3] [ 2 0 7 0 3 0 10 8 0 0 0] [ 4 0 0 3 10 0 0 0 0 7 1] [ 0 5 4 3 0 0 15 18 0 0 0] [ 0 6 0 0 5 4 0 0 2 2 0] [ 1 2 0 0 11 11 0 20 7 0 0] [ 0 6 12 0 0 0 18 192 180 98 29] [ 2 0 0 1 11 0 0 196 255 245 236] [ 0 2 1 0 0 0 10 19 183 242 244] [ 0 1 2 0 0 0 4 9 0 29 82] [ 0 0 3 2 0 0 0 0 10 0 0]]
02. Sample Random Generated Array into Images¶
import numpy as np
import matplotlib.pyplot as plt
# Step 1: Get user input
x = int(input("Enter the number of rows (max 200): "))
y = int(input("Enter the number of columns (max 200): "))
# Step 2: Check constraint
if x > 200 or y > 200 or x <= 0 or y <= 0:
raise ValueError("Both dimensions must be between 1 and 1000.")
# Step 3: Generate random array (grayscale image)
array = np.random.randint(0, 256, size=(x, y), dtype=np.uint8)
# Step 4: Display the image
plt.imshow(array, cmap='gray', vmin=0, vmax=255)
plt.title(f"Random {x}x{y} Grayscale Image")
plt.axis('off') # Hide axis for better presentation
plt.show()
03. Pixels, Resolution, and Band¶
import numpy as np
import matplotlib.pyplot as plt
# Step 1: Create a small synthetic RGB image (size: 4 rows x 6 columns)
# Each pixel has 3 values (Red, Green, Blue), each ranging from 0 to 255
height, width = 4, 6
image = np.random.randint(0, 256, size=(height, width, 3), dtype=np.uint8)
# Step 2: Separate the image into its RGB color bands
red_band = image[:, :, 0] # Red channel
green_band = image[:, :, 1] # Green channel
blue_band = image[:, :, 2] # Blue channel
# Step 3: Plot the full RGB image and its individual color bands
fig, axs = plt.subplots(1, 4, figsize=(14, 4))
axs[0].imshow(image)
axs[0].set_title("RGB Image (Resolution: 6x4)")
axs[0].axis('off')
axs[1].imshow(red_band, cmap='Reds')
axs[1].set_title("Red Band")
axs[1].axis('off')
axs[2].imshow(green_band, cmap='Greens')
axs[2].set_title("Green Band")
axs[2].axis('off')
axs[3].imshow(blue_band, cmap='Blues')
axs[3].set_title("Blue Band")
axs[3].axis('off')
plt.tight_layout()
plt.show()
plt.tight_layout()
plt.show()
<Figure size 640x480 with 0 Axes>
Here's a visual example that demonstrates the concepts of pixels, resolution, and bands:
- The left image is a small, randomly generated RGB image with a resolution of 6×4 (6 pixels wide, 4 pixels tall).
- The next three images show the Red, Green, and Blue bands separately — these are the individual channels that make up the full-color image.
04. Real Image Identification¶
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import matplotlib.patches as patches
# Step 1: Load the image using PIL
# Optionally, you can convert it to grayscale using .convert('L'), but here we keep it in RGB
img = Image.open("../02_Data/UB_BUILDING.jpg") # Update the path as needed
img_array = np.array(img) # Convert to NumPy array (Height x Width x 3)
# Step 2: Show the original image without axis
fig, ax = plt.subplots()
ax.imshow(img_array)
ax.axis('off')
plt.show()
# Step 3: Print resolution (height x width)
print(f'Resolution: {img_array[:,:,0].shape}') # Only one band needed to get height and width
# Step 4: Extract Red, Green, and Blue channels
red_band = img_array[:, :, 0]
green_band = img_array[:, :, 1]
blue_band = img_array[:, :, 2]
# Step 5: Plot the original and color-separated bands
fig, axs = plt.subplots(1, 4, figsize=(18, 5))
# Original Image
axs[0].imshow(img_array)
axs[0].axis('off')
axs[0].set_title("Original RGB Image")
# Red Channel
axs[1].imshow(red_band, cmap='Reds')
axs[1].axis('off')
axs[1].set_title("Red Band")
# Green Channel
axs[2].imshow(green_band, cmap='Greens')
axs[2].axis('off')
axs[2].set_title("Green Band")
# Blue Channel
axs[3].imshow(blue_band, cmap='Blues')
axs[3].axis('off')
axs[3].set_title("Blue Band")
plt.tight_layout()
plt.show()
Resolution: (150, 150)
from skimage import io, color, filters, exposure
import matplotlib.pyplot as plt
import numpy as np
# =============================================
# Step 1: Load and Convert to Grayscale
# =============================================
# Load a local RGB image (e.g., a photograph of UB building)
image_path = "../02_Data/UB_BUILDING.jpg" # Replace with your own path
image_rgb = io.imread(image_path)
# Convert RGB to grayscale (luminance-preserving)
gray_image = color.rgb2gray(image_rgb)
# =============================================
# Step 2: Explore Intensity with Histogram
# =============================================
# Compute histogram to analyze brightness distribution
hist, hist_centers = exposure.histogram(gray_image)
# =============================================
# Step 3: Apply Thresholding
# =============================================
# Use Otsu's method to find an optimal automatic threshold
otsu_thresh = filters.threshold_otsu(gray_image)
# Binary mask using Otsu
binary_otsu = gray_image > otsu_thresh
# Also try a manual threshold (e.g., 0.5)
manual_thresh = 0.5
binary_manual = gray_image > manual_thresh
# =============================================
# Step 4: Non-Binary Background Removal
# =============================================
# Instead of binary, keep intensity where mask is True
masked_otsu = np.where(binary_otsu, gray_image, 0)
masked_manual = np.where(binary_manual, gray_image, 0)
# =============================================
# Step 5: Plotting Results for Comparison
# =============================================
fig, axs = plt.subplots(2, 4, figsize=(20, 10))
# Original image
axs[0, 0].imshow(image_rgb)
axs[0, 0].set_title("Original RGB Image")
axs[0, 0].axis('off')
# Grayscale image
axs[0, 1].imshow(gray_image, cmap='gray')
axs[0, 1].set_title("Grayscale Image")
axs[0, 1].axis('off')
# Histogram with Otsu threshold marked
axs[0, 2].plot(hist_centers, hist, lw=2)
axs[0, 2].axvline(otsu_thresh, color='red', linestyle='--', label=f"Otsu: {otsu_thresh:.2f}")
axs[0, 2].axvline(manual_thresh, color='green', linestyle=':', label=f"Manual: {manual_thresh}")
axs[0, 2].set_title("Grayscale Histogram")
axs[0, 2].legend()
axs[0, 2].grid(True)
# Placeholder
axs[0, 3].axis('off')
# Otsu binary mask
axs[1, 0].imshow(binary_otsu, cmap='gray')
axs[1, 0].set_title("Otsu Binary Mask")
axs[1, 0].axis('off')
# Manual binary mask
axs[1, 1].imshow(binary_manual, cmap='gray')
axs[1, 1].set_title(f"Manual Binary Mask > {manual_thresh}")
axs[1, 1].axis('off')
# Grayscale-masked using Otsu
axs[1, 2].imshow(masked_otsu, cmap='gray')
axs[1, 2].set_title("Otsu: Background Removed (Gray Intensity)")
axs[1, 2].axis('off')
# Grayscale-masked using Manual threshold
axs[1, 3].imshow(masked_manual, cmap='gray')
axs[1, 3].set_title("Manual: Background Removed (Gray Intensity)")
axs[1, 3].axis('off')
plt.tight_layout()
plt.show()
✅ Step 1: Load and Convert to Grayscale
We begin by loading a real RGB image (e.g., a photograph of a building). The image is then converted to grayscale using skimage.color.rgb2gray, which preserves luminance instead of simply averaging RGB values.
image_rgb = io.imread(image_path)
gray_image = color.rgb2gray(image_rgb)
✅ Step 2: Explore Intensity Using Histogram
The grayscale image is composed of pixel values between 0 (black) and 1 (white). A histogram is computed to understand how these intensities are distributed—whether the image is generally dark, bright, or balanced.
hist, hist_centers = exposure.histogram(gray_image)
This helps inform our thresholding decisions.
✅ Step 3: Apply Thresholding
🎯 Otsu’s Method
We use Otsu's thresholding, which is a popular method to automatically find a cutoff value that separates the background and foreground by minimizing intra-class variance.
otsu_thresh = filters.threshold_otsu(gray_image)
binary_otsu = gray_image > otsu_thresh
✋ Manual Thresholding
For comparison, we apply a fixed threshold value (e.g., 0.5) to manually segment the image.
manual_thresh = 0.5
binary_manual = gray_image > manual_thresh
✅ Step 4: Remove Background (Non-Binary Masking)
Instead of keeping the image binary, we retain the original grayscale intensity where the mask is True, and set the rest to zero. This effectively removes the background while preserving brightness in the foreground.
masked_otsu = np.where(binary_otsu, gray_image, 0)
masked_manual = np.where(binary_manual, gray_image, 0)
✅ Step 5: Visualize the Results
We compare:
- RGB original image
- Grayscale conversion
- Histogram showing both thresholds
- Binary masks from Otsu and Manual
- Background-removed grayscale images (Otsu and Manual)
This gives a complete picture of how thresholding affects segmentation.
📌 Key Concepts
| Concept | Explanation |
|---|---|
| Grayscale Image | 2D image with brightness intensity (0–1 in skimage) |
| Histogram | Shows pixel intensity distribution |
| Otsu’s Threshold | Automatically chooses threshold by minimizing within-class variance |
| Manual Threshold | User-defined fixed cutoff for pixel segmentation |
| Masking | Used to isolate or remove certain pixel regions from an image |
🧪 Code Summary (Libraries Used)
skimage.ioandskimage.color– to load and process imagesskimage.filters– for Otsu thresholdingskimage.exposure– for histogram generationmatplotlib.pyplot– for visualization
02. Geometric Operation in Images¶
Convolution¶
Here, we start by converting the image into grayscale to simplify the data and remove color distractions. This is important because many spatial filters, like edge detection and sharpening, work on intensity values, not color.
Next, we apply a blurring filter using a simple averaging kernel. This kernel takes the average of surrounding pixels, helping us to smooth out the image and reduce noise. Blurring is often used as a preprocessing step before detecting edges.
Then, we apply a sharpening filter. This filter enhances the edges and details in the image by emphasizing the difference between each pixel and its neighbors. The result is an image with clearer boundaries and features — useful in applications like document scanning or medical imaging.
Lastly, we apply a Sobel filter, specifically the vertical edge detector. This operation highlights areas of the image where intensity changes sharply — meaning it shows where objects or textures begin and end. These kinds of edge detectors are critical in many computer vision tasks like object detection, segmentation, or OCR.
All these filters are applied through convolution, where we slide a kernel (a small matrix) across the image, computing a weighted sum at each step. This is the fundamental idea behind many classical and modern image processing techniques, including Convolutional Neural Networks (CNNs).
By comparing the original, blurred, sharpened, and edge-detected images side by side, we get a clearer understanding of how each filter transforms the visual information, and how convolution plays a central role in image understanding.
from skimage import io, color
from scipy import ndimage as ndi
import numpy as np
import matplotlib.pyplot as plt
# === Step 1: Load and Preprocess the Image ===
# Read the RGB image using skimage
img = io.imread("../MATERI/Material/UB_BUILDING.jpg") # Replace with your own image path
# Convert the RGB image to grayscale for processing
gray = color.rgb2gray(img)
# === Step 2: Define Convolution Kernels ===
# Blur Kernel: A simple averaging filter (3x3 mean filter)
# Each pixel is replaced with the average of its neighbors
blur_kernel = np.ones((3, 3)) / 9
# Sharpen Kernel: Enhances edges and fine details
# This is a classic "unsharp mask" kernel
sharpen_kernel = np.array([[ 0, -1, 0],
[-1, 5, -1],
[ 0, -1, 0]])
# Sobel Kernel: Detects vertical edges in the image
# Sobel filters are widely used in edge detection
sobel_kernel = np.array([[-1, -2, -1],
[ 0, 0, 0],
[ 1, 2, 1]])
# === Step 3: Apply Convolution to the Grayscale Image ===
# Apply blur filter to smooth the image
blurred = ndi.convolve(gray, blur_kernel)
# Apply sharpening filter to enhance details
sharpened = ndi.convolve(gray, sharpen_kernel)
# Apply Sobel filter to detect vertical edges
edges = ndi.convolve(gray, sobel_kernel)
# === Step 4: Display Results for Comparison ===
# Define titles and image list for plotting
titles = ["Original", "Blurred", "Sharpened", "Edges (Sobel)"]
images = [gray, blurred, sharpened, edges]
# Create a 2x2 grid of plots
plt.figure(figsize=(14, 8))
for i in range(4):
plt.subplot(2, 2, i+1)
plt.imshow(images[i], cmap='gray') # Show image in grayscale
plt.title(titles[i])
plt.axis('off') # Hide axis for clarity
plt.tight_layout()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from skimage.io import imread
from skimage.color import rgb2gray
from skimage.transform import resize, rotate, AffineTransform, warp
# Load image and convert to grayscale
image = imread('../02_Data/UB_BUILDING.jpg') # Replace with your image path
gray_image = rgb2gray(image)
# 1. Resize image to half its original size
resized = resize(gray_image, (gray_image.shape[0] // 2, gray_image.shape[1] // 2), anti_aliasing=True)
# 2. Shift (translate) image by (tx=30, ty=50) pixels
tform = AffineTransform(translation=(30, 50))
shifted = warp(gray_image, tform.inverse)
# 3. Flip image horizontally
flipped_horizontal = np.fliplr(gray_image)
# 4. Flip image vertically
flipped_vertical = np.flipud(gray_image)
# 5. Rotate image by 45 degrees (counter-clockwise)
rotated = rotate(gray_image, angle=45, resize=True)
# Plot all transformations
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.ravel()
axes[0].imshow(gray_image, cmap='gray')
axes[0].set_title('Original Image')
axes[0].axis('off')
axes[1].imshow(resized, cmap='gray')
axes[1].set_title('Resized (Half Size)')
axes[1].axis('off')
axes[2].imshow(shifted, cmap='gray')
axes[2].set_title('Shifted (30, 50)')
axes[2].axis('off')
axes[3].imshow(flipped_horizontal, cmap='gray')
axes[3].set_title('Flipped Horizontally')
axes[3].axis('off')
axes[4].imshow(flipped_vertical, cmap='gray')
axes[4].set_title('Flipped Vertically')
axes[4].axis('off')
axes[5].imshow(rotated, cmap='gray')
axes[5].set_title('Rotated 45 Degrees')
axes[5].axis('off')
plt.tight_layout()
plt.show()
🔢 Transformations Overview
| Transformation | Description |
|---|---|
| Resizing | Reducing or enlarging image dimensions. Useful for compression or standardization. |
| Translation | Shifting the image by a specified number of pixels (x, y). Useful in data augmentation or object tracking. |
| Flipping | Mirroring the image. Horizontal = mirror left-right. Vertical = flip top-bottom. |
| Rotation | Rotating the image around its center by a certain angle. Used for augmentation or alignment. |
🪄 Step-by-Step Explanation
✅ 1. Convert to Grayscale We begin with a color (RGB) image and convert it to grayscale for simplification in processing.
gray_image = rgb2gray(image)
✅ 2. Resize Image
We reduce the image size by 50% in both dimensions. This is commonly used in:
- Memory efficiency
- Model input standardization
- Thumbnail generation
resized = resize(gray_image, (height // 2, width // 2))
✅ 3. Translate (Shift) Image
We shift the image 30 pixels right and 50 pixels down using an affine transformation.
tform = AffineTransform(translation=(30, 50))
shifted = warp(gray_image, tform.inverse)
✅ 4. Flip Image
Used in data augmentation to simulate mirrored views.
- Horizontal flip (mirror left-right):
np.fliplr() - Vertical flip (mirror top-bottom):
np.flipud()
✅ 5. Rotate Image
Rotates the image counter-clockwise by 45 degrees. Resize is enabled to prevent clipping.
rotated = rotate(gray_image, angle=45, resize=True)
📊 Visualization
All transformed images are displayed alongside the original to compare results and understand spatial changes.
🧪 Libraries Used
skimage.io,skimage.color: for loading and grayscale conversionskimage.transform: for resizing, rotating, and shiftingNumPy: for flipping and general array operationsmatplotlib.pyplot: for visualization
03. Spatial Filtering (Convolution)¶
from skimage import io, color
from scipy import ndimage as ndi
import numpy as np
import matplotlib.pyplot as plt
# ===============================================================
# Step 1: Load and Preprocess the Image
# ===============================================================
# Load an RGB image using skimage
img = io.imread("../02_Data/UB_BUILDING.jpg") # Change path if needed
# Convert the image to grayscale for filter application
gray = color.rgb2gray(img)
# ===============================================================
# Step 2: Define Convolution Kernels
# ===============================================================
# 1. Blur Kernel (Mean Filter)
# Smooths the image by averaging the pixel with its neighbors
blur_kernel = np.ones((3, 3)) / 9
# 2. Sharpen Kernel (Unsharp Mask)
# Enhances edges by subtracting the blurred image from the original
sharpen_kernel = np.array([[ 0, -1, 0],
[-1, 5, -1],
[ 0, -1, 0]])
# 3. Sobel Kernel (Vertical Edge Detection)
# Detects vertical changes in intensity
sobel_kernel = np.array([[-1, -2, -1],
[ 0, 0, 0],
[ 1, 2, 1]])
# ===============================================================
# Step 3: Apply Convolution Filters
# ===============================================================
# Apply blur filter to smooth noise and details
blurred = ndi.convolve(gray, blur_kernel)
# Apply sharpening filter to enhance image edges
sharpened = ndi.convolve(gray, sharpen_kernel)
# Apply Sobel filter to detect vertical edges
edges = ndi.convolve(gray, sobel_kernel)
# ===============================================================
# Step 4: Display All Filtered Results
# ===============================================================
# Set up titles and corresponding images
titles = ["Original", "Blurred", "Sharpened", "Edges (Sobel)"]
images = [gray, blurred, sharpened, edges]
# Plot all images in a 2x2 grid
plt.figure(figsize=(14, 8))
for i in range(4):
plt.subplot(2, 2, i+1)
plt.imshow(images[i], cmap='gray')
plt.title(titles[i])
plt.axis('off')
plt.tight_layout()
plt.show()
🔍 Why Use Filters?
Convolution filters are fundamental in image processing for tasks such as:
- Smoothing or denoising images
- Enhancing features or edges
- Detecting patterns like edges or textures
🧱 1. Convolution Kernels Used
🔹 A. Blur Kernel (Mean Filter) Smooths the image by averaging neighboring pixels.
blur_kernel = np.ones((3, 3)) / 9
🔹 B. Sharpen Kernel
Highlights edges and details. Often used to make images appear clearer.
sharpen_kernel = np.array([
[ 0, -1, 0],
[-1, 5, -1],
[ 0, -1, 0]
])
🔹 C. Sobel Filter (Vertical)
A specific edge-detection filter that identifies vertical gradients.
sobel_kernel = np.array([
[-1, -2, -1],
[ 0, 0, 0],
[ 1, 2, 1]
])
⚙️ 2. Processing Steps
| Step | Description |
|---|---|
| Load Image | Use skimage.io to read and convert to grayscale. |
| Apply Filters | Use scipy.ndimage.convolve() to apply each kernel. |
| Display Output | Visualize original and filtered results in a grid. |
📊 Visual Comparison
Each of the following is shown:
- Original Grayscale Image
- Blurred Image (Smoothing)
- Sharpened Image (Detail enhancement)
- Edges (Sobel Filter) (Edge detection)
🧪 Output Example
| Filter | Effect Description |
|---|---|
| Blurred | Reduces noise and detail |
| Sharpened | Enhances outlines and contrast |
| Sobel | Highlights vertical edges (e.g., buildings, poles) |
🧠 Key Concepts
- Convolution is a mathematical operation combining two functions (image and kernel) to produce a third function.
- Kernels (also called masks or filters) define the transformation to be applied.
- Convolution is used for both feature extraction and image enhancement.
📚 Libraries Used
skimage.io,skimage.color— for image loading and grayscale conversionscipy.ndimage— for convolution operationsmatplotlib.pyplot— for visualizationNumPy— for matrix and kernel creation
✅ Suggested Exercises
- Try other edge filters like Prewitt or Roberts.
- Increase kernel size for stronger smoothing effects.
- Combine filters (e.g., blur → sharpen → detect edge).
04 More Filtering¶
from skimage import io, color, filters, restoration
from scipy import ndimage as ndi
import numpy as np
import matplotlib.pyplot as plt
from skimage.util import random_noise
# ===============================================================
# Step 1: Load and Convert Image to Grayscale
# ===============================================================
# Load the RGB image from local path
img = io.imread("../02_Data/UB_BUILDING.jpg") # Adjust path as needed
# Convert to grayscale (required for most filters)
gray = color.rgb2gray(img)
# ===============================================================
# Step 2: Add Salt-and-Pepper Noise
# ===============================================================
# Simulate a noisy image using salt-and-pepper noise (for median filtering demo)
noisy_image = random_noise(gray, mode='s&p', amount=0.05)
# ===============================================================
# Step 3: Smoothing Filters
# ===============================================================
# Mean (Average) filter using a 3x3 uniform kernel
mean_kernel = np.ones((3, 3)) / 9
mean_filtered = ndi.convolve(gray, mean_kernel)
# Gaussian Blur: applies weighted smoothing, more natural than mean
gaussian_filtered = filters.gaussian(gray, sigma=1)
# ===============================================================
# Step 4: Sharpening Filters
# ===============================================================
# Laplacian filter (second derivative): highlights rapid intensity changes
laplacian_filtered = ndi.laplace(gray)
# High-pass filter: subtract blurred (low-frequency) version from original
high_pass = gray - gaussian_filtered
# ===============================================================
# Step 5: Median Filter (Good for Impulse Noise)
# ===============================================================
# Median filter: removes salt-and-pepper noise effectively
median_filtered = filters.median(noisy_image)
# ===============================================================
# Step 6: Plot All Results
# ===============================================================
# Set titles and corresponding images
titles = [
"Original",
"Gaussian Filter",
"Laplacian (Sharpen)",
"High-Pass Filter",
"Noisy Image (Salt & Pepper)",
"Median Filter"
]
images = [
gray,
gaussian_filtered,
laplacian_filtered,
high_pass,
noisy_image,
median_filtered
]
# Plot images in a grid
plt.figure(figsize=(18, 10))
for i, (img, title) in enumerate(zip(images, titles)):
plt.subplot(2, 4, i + 1)
plt.imshow(img, cmap='gray')
plt.title(title)
plt.axis('off')
plt.tight_layout()
plt.show()
🖼 Filters Demonstrated:
🔹 1. Gaussian Filter (Smoothing)
- A weighted average that gives higher importance to nearby pixels.
- Preserves general structure while reducing noise.
filters.gaussian(gray, sigma=1)
🔹 2. Laplacian Filter (Sharpening)
- Second derivative filter used to detect edges.
- Highlights areas of rapid intensity change.
ndi.laplace(gray)
🔹 3. High-Pass Filter
- Subtracts a smoothed (low-frequency) version from the original.
- Retains high-frequency details such as edges and textures.
high_pass = gray - gaussian_filtered
🔹 4. Salt-and-Pepper Noise
- Random black and white pixels simulate sensor errors.
random_noise(gray, mode='s&p', amount=0.05)
🔹 5. Median Filter
- Replaces each pixel with the median of its neighborhood.
- Highly effective at removing impulse (salt-and-pepper) noise.
filters.median(noisy_image)
📊 Output Overview:
| Filter | Effect |
|---|---|
| Original | Clean grayscale image |
| Gaussian Filter | Smooths image while preserving edges |
| Laplacian Filter | Highlights transitions and edges |
| High-Pass Filter | Isolates fine detail and texture |
| Salt-and-Pepper Noise | Simulates camera/sensor noise |
| Median Filter | Denoises impulse noise without blurring edges |
📚 Libraries Used:
skimage.io,skimage.color,skimage.filters,skimage.util— for image manipulationscipy.ndimage— for convolution operationsmatplotlib.pyplot— for visualizing outputs
🧠 Key Concepts:
- Low-pass filters (mean, Gaussian) reduce noise but may blur edges.
- High-pass filters (Laplacian, high-pass) enhance details but may amplify noise.
- Median filtering is especially effective for removing salt-and-pepper noise.
✅ Suggested Exercises:
- Compare median vs Gaussian filtering on noisy images.
- Apply filters to color images (per channel).
- Test different
sigmavalues for Gaussian smoothing.
05. [BONUS] Morphological Operation¶
import numpy as np
import matplotlib.pyplot as plt
from skimage.morphology import dilation, erosion, square
from skimage.io import imread
from skimage.color import rgb2gray
from skimage.filters import threshold_otsu
# Load the image from file (update path accordingly)
image = imread('../02_Data/UB_BUILDING.jpg')
# Convert the RGB image to grayscale for easier processing
gray_image = rgb2gray(image)
# Compute Otsu's threshold value to binarize the grayscale image
thresh = threshold_otsu(gray_image)
# Create a binary image by thresholding the grayscale image
# Pixels above the threshold become True (foreground), others False (background)
binary_image = gray_image > thresh
# Define a 3x3 square structuring element for morphological operations
selem = square(3)
# Perform dilation on the binary image using the structuring element
# Dilation grows the white (foreground) regions, filling small holes and gaps
dilated = dilation(binary_image, selem)
# Perform erosion on the binary image using the structuring element
# Erosion shrinks the white regions, removing noise and separating connected components
eroded = erosion(binary_image, selem)
# Plot the results to visualize each step
fig, axes = plt.subplots(1, 4, figsize=(12, 4))
# Show grayscale image
axes[0].imshow(gray_image, cmap='gray')
axes[0].set_title('Grayscale Image')
axes[0].axis('off')
# Show binary image after thresholding
axes[1].imshow(binary_image, cmap='gray')
axes[1].set_title('Binary Image')
axes[1].axis('off')
# Show image after dilation
axes[2].imshow(dilated, cmap='gray')
axes[2].set_title('Dilated Image')
axes[2].axis('off')
# Show image after erosion
axes[3].imshow(eroded, cmap='gray')
axes[3].set_title('Eroded Image')
axes[3].axis('off')
plt.tight_layout()
plt.show()
Key Morphological Operations
- Dilation
- Expands or grows the foreground (white) regions in a binary image.
- Useful for closing small holes or gaps.
- The extent of growth depends on the structuring element (SE) used.
- Erosion
- Shrinks or erodes the foreground regions.
- Helps remove small noise and detach objects that are close tding manual tuning.
Workflow Demonstrated in Code
- Load an RGB image and convert it to grayscale.
- Apply Otsu's method to compute a threshold.
- Generate a binary image by thresholding.
- Define a structuring element (3x3 square).
- Apply dilation to grow foreground objects.
- Apply erosion to shrink foreground objects.
- Visualize all steps side-by-side.
Effects of Morphological Operations
| Operation | Effect on Foreground | Typical Use Cases |
|---|---|---|
| Dilation | Expands | Close gaps, fill holes |
| Erosion | Shrinks | Remove noise, separate connected objects |
Additional Concepts
- Opening: Erosion followed by dilation. Useful to remove noise while preserving object shape.
- Closing: Dilation followsis such as segmentation, feature extraction, and noise reduction.
e areas | Hole filling, object connection |
06. [BONUS] Compression using Fourier Transformation¶
Fourier Transformation
The Fourier Transform is a mathematical technique that transforms a signal (in our case, an image) from the spatial domain (pixels) to the frequency domain.
💡 Intuition
- In the spatial domain: images are made up of pixels with varying intensity.
- In the frequency domain: the image is represented by a combination of sinusoidal patterns of various frequencies and orientations.
🔁 Why Use FT in Image Processing?
- To analyze textures, patterns, or repeated structures.
- To filter images (e.g., remove noise).
- To compress images by keeping only important f
🔬 2. Discrete Fourier Transform (DFT) for Images
In practice, we use the 2D Discrete Fourier Transform (DFT), efficiently computed using Fast Fourier Transform (FFT).
np.fft.fft2()computes the 2D FFT.np.fft.fftshift()centers the zero-frequency component (for better visualization).np.abs()gives the magnitude spectrum.
import numpy as np
import matplotlib.pyplot as plt
from skimage import io, color
# Load image from file and convert to grayscale
image = io.imread("../02_Data/UB_BUILDING.jpg") # Replace with your path
gray = color.rgb2gray(image)
# Compute the 2D Fast Fourier Transform (FFT) of the grayscale image
f_transform = np.fft.fft2(gray)
# Shift the zero-frequency component to the center for easier visualization and manipulation
f_shifted = np.fft.fftshift(f_transform)
# Function to compress the FFT by zeroing out low magnitude frequency components
def compress_fft(f_shifted, keep_ratio=0.1):
"""
Compress FFT coefficients by keeping only the top 'keep_ratio' percent of magnitudes.
Parameters:
f_shifted (ndarray): FFT shifted frequency domain data.
keep_ratio (float): Fraction of frequency components to keep (e.g., 0.1 for 10%).
Returns:
compressed_fft (ndarray): FFT data with low magnitude components zeroed out.
mask (ndarray): Boolean mask of kept components.
"""
magnitude = np.abs(f_shifted) # Get magnitude of complex FFT coefficients
threshold = np.percentile(magnitude, 100 - keep_ratio * 100) # Calculate threshold magnitude
mask = magnitude > threshold # Create mask to keep top components
return f_shifted * mask, mask
# Compress the FFT by keeping only top 10% magnitude components
compressed_fft, mask = compress_fft(f_shifted, keep_ratio=0.1)
# Shift back the zero-frequency component to original position
f_ishift = np.fft.ifftshift(compressed_fft)
# Perform inverse FFT to reconstruct image from compressed frequency data
reconstructed = np.fft.ifft2(f_ishift)
# Take the magnitude (absolute value) to get real-valued image after inverse FFT
reconstructed = np.abs(reconstructed)
# Plot the original grayscale image, frequency mask, and reconstructed compressed image
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
plt.imshow(gray, cmap='gray')
plt.title("Original Grayscale Image")
plt.axis('off')
plt.subplot(1, 3, 2)
plt.imshow(mask, cmap='gray')
plt.title("Frequency Mask (10% Kept)")
plt.axis('off')
plt.subplot(1, 3, 3)
plt.imshow(reconstructed, cmap='gray')
plt.title("Reconstructed (Compressed) Image")
plt.axis('off')
plt.tight_layout()
plt.show()
Key Concepts
- Frequency Domain Compression
- Images can be represented by their frequency components using FFT.
- Many frequency components have very low magnitude and contribute little visually.
- By discarding low magnitude components, we can compress the image data.
- Thresholding Frequency Components
- We select a percentage (e.g., top 10%) of the highest magnitude components.
- Components below this threshold are set to zero.
- This process reduces the amount of frequency data used to reconstruct the image.
- Image Reconstruction
- The inverse FFT converts the compressed frequency data back into the spatial domain.
- Resulting image is an approximation of the original.
- Compression level controls the trade-off between image quality and data reduction.
Detailed Explanation
| Step | Description |
|---|---|
| FFT Computation | Converts image to frequency domain |
| Magnitude Threshold | Identify top frequency components to keep |
| Masking | Zero out low magnitude frequencies |
| Inverse FFT | Reconstruct image from compressed frequency components |
Visual Interpretation
- Original Image: The source grayscale image.
- Frequency Mask: Binary image showing which frequency components are kept.
- Reconstructed Image: Image after compression, showing quality loss if many frequencies are removed.
Practical Applications
- Image compression formats like JPEG use similar frequency domain techniques.
- Reducing data size for transmission or storage.
- Feature extraction by emphasizing significant frequencies.
- Noise reduction by filtering out high frequency noise components.
Summary
Frequency domain compression leverages the fact that many images have sparse significant frequency components. By keeping only these, we reduce data size while retaining most visual information. The FFT and its inverse provide a powerful framework for image analysis and compression.
07. [BONUS] Simple segmentation using Watershed¶
import matplotlib.pyplot as plt
from skimage import data, filters, segmentation, morphology, color
from scipy import ndimage as ndi
import numpy as np
# Load sample image
image = data.coins()
# Compute the gradient (edges) using Sobel filter
gradient = filters.sobel(image)
# Generate markers for watershed
# Label background markers by thresholding low intensity regions
markers = np.zeros_like(image)
markers[image < 30] = 1 # Background marker
markers[image > 150] = 2 # Foreground marker (coins)
# Apply watershed segmentation
ws_labels = segmentation.watershed(gradient, markers)
# Visualize segmentation boundaries overlayed on original image
boundaries = segmentation.find_boundaries(ws_labels)
plt.figure(figsize=(12, 6))
plt.subplot(1, 3, 1)
plt.title('Original Image')
plt.imshow(image, cmap='gray')
plt.axis('off')
plt.subplot(1, 3, 2)
plt.title('Gradient Image')
plt.imshow(gradient, cmap='gray')
plt.axis('off')
plt.subplot(1, 3, 3)
plt.title('Watershed Segmentation')
plt.imshow(color.label2rgb(ws_labels, image=image))
plt.axis('off')
plt.tight_layout()
plt.show()
Watershed Segmentation using Sobel Gradient and Markers
Overview
This example demonstrates how to perform image segmentation using the watershed algorithm from the skimage library. The watershed method uses gradient information and predefined markers to separate foreground objects from the background, particularly useful for segmenting touching or overlapping objects such as coins.
Explanation
- Loading the Sample Image
image = data.coins()
- The sample image
coinsfromskimage.datais a grayscale image with multiple coins touching each other, making it a good candidate for segmentation.
- Computing the Gradient
gradient = filters.sobel(image)
- The Sobel filter detects edges by computing the gradient magnitude of the image.
- Edges appear as high gradient values and are essential for watershed segmentation.
- Creating Markers for Watershed
markers = np.zeros_like(image)
markers[image < 30] = 1 Background marker
markers[image > 150] = 2 Foreground marker (coins)
- Markers serve as seed points for the watershed algorithm.
- Pixels with intensity less than 30 are marked as background (label 1).
- Pixels with intensity greater than 150 are marked as foreground (label 2).
- The watershed algorithm grows regions from these markers guided by the gradient.
- Applying Watershed Segmentation
ws_labels = segmentation.watershed(gradient, markers)
- Watershed segments the image by "flooding" from the markers until boundaries meet, following gradient information.
- The result is a labeled image where each segment has a unique integer label.
- Visualizing Results
plt.imshow(color.label2rgb(ws_labels, image=image))
label2rgboverlays the segmentation labels as colors on the original image for better visualization.- Boundaries of the segmented regions are clearly visible.
Key Concepts
- Gradient: Measures how intensity changes at each pixel; edges have high gradients.
- Markers: Predefined regions that indicate known areas of the background and foreground.
- Watershed Algorithm: Treats the image like a topographic surface and floods from markers to segment regions.
When to Use Watershed
- Segmenting touching or overlapping objects.
- When you can define markers for background and foreground.
- Images with good contrast between objects and background.
Additional Notes
- Marker creation is critical; poor markers lead to poor segmentation.
- You can improve segmentation by refining markers with morphological operations or using distance transforms.
- The watershed can be sensitive to noise; smoothing or preprocessing may help.
Summary
This example shows a simple but effective use of watershed segmentation with Sobel gradients and manually defined markers to segment coins in a grayscale image. It highlights the importance of gradient-based edges and seed markers for watershed to correctly separate objects.
08. [BONUS] SLIC Segmentation¶
import matplotlib.pyplot as plt
from skimage import io, segmentation, color
from skimage.color import rgb2lab
# Load color image
image = io.imread('../02_Data/UB_BUILDING2.jpg')
# (Optional) Convert RGB to LAB color space for better segmentation
lab_image = rgb2lab(image)
# Apply SLIC superpixel segmentation
segments = segmentation.slic(lab_image, n_segments=50, compactness=10, start_label=1)
# Visualize segmentation overlay on original image
segmented_image = color.label2rgb(segments, image=image, kind='avg')
plt.figure(figsize=(10, 6))
plt.imshow(segmented_image)
plt.title('SLIC Superpixel Segmentation')
plt.axis('off')
plt.show()
SLIC Superpixel Segmentation using LAB Color Space
Overview
This example demonstrates how to segment an image using SLIC (Simple Linear Iterative Clustering) superpixels. Superpixels group pixels into perceptually meaningful atomic regions, reducing complexity and enabling efficient analysis in subsequent tasks such as object recognition or tracking.
Explanation
- Loading the Image
image = io.imread('../MATERI/Material/UB_BUILDING2.jpg')
- Loads a color image using
skimage.io. - Replace the path with your own image if necessary.
- Converting RGB to LAB (Optional but Recommended)
lab_image = rgb2lab(image)
- LAB color space is perceptually uniform, meaning Euclidean distances better reflect perceptual differences than in RGB.
- Improves superpixel quality, especially in natural images with varying lighting.
- Applying SLIC Superpixel Segmentation
segments = segmentation.slic(
lab_image,
n_segments=50,
compactness=10,
start_label=1
)
slicsegments the image into approximatelyn_segmentssuperpixels.compactnessbalances color similarity and spatial proximity. Higher values make superpixels more square/compact.start_label=1ensures labels start from 1, not 0 (useful for visualization).
- Visualizing the Result
segmented_image = color.label2rgb(segments, image=image, kind='avg')
label2rgb(..., kind='avg')overlays the average color of each superpixel onto the original image.- Gives a visually intuitive representation of segmentation.
Key Concepts
Superpixels: Groups of connected pixels with similar color and texture, preserving important boundaries.
SLIC Algorithm:
- Clusters pixels in a combined 5D space (color + spatial).
- Iterative refinement ensures high boundary adherence and spatial compactness.
LAB Color Space:
- Perceptually uniform.
- Better for color clustering than RGB.
Parameters in SLIC
| Parameter | Description |
|---|---|
n_segments |
Approximate number of superpixels |
compactness |
Trade-off between color similarity and spatial proximity (default = 10) |
start_label |
First label index assigned to segments (1 is often clearer for visualization) |
Applications
- Image segmentation and preprocessing
- Region-based object detection
- Texture and color-based analysis
- Efficient feature extraction in large images
Summary
This code demonstrates how to:
- Load and preprocess a color image.
- Apply SLIC superpixel segmentation (optionally in LAB space).
- Visualize the result using average color overlays.
Using LAB space with SLIC helps produce cleaner, more natural-looking segments — especially useful in real-world scenes.
09. Clustering Image using K-Means¶
import numpy as np
import matplotlib.pyplot as plt
from skimage import io
from sklearn.cluster import KMeans
# Load image
image = io.imread('../02_Data/UB_BUILDING2.jpg')
pixels = image.reshape(-1, 3)
# Define fixed distinct colors (up to 7 clusters)
cluster_colors = np.array([
[255, 0, 0], # Red
[0, 255, 0], # Green
[0, 0, 255], # Blue
[255, 255, 0], # Yellow
[0, 255, 255], # Cyan
[255, 0, 255], # Magenta
[255, 165, 0], # Orange
], dtype=np.uint8)
# List of cluster numbers to compare
cluster_list = [3, 5, 7]
# Create 2x2 subplot grid
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()
# Plot original image
axes[0].imshow(image)
axes[0].set_title("Original Image")
axes[0].axis('off')
# Apply KMeans clustering for each k and plot results
for i, k in enumerate(cluster_list):
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(pixels)
labels = kmeans.labels_
# Map labels to predefined colors
colored_pixels = cluster_colors[labels % len(cluster_colors)]
segmented_img = colored_pixels.reshape(image.shape)
# Show segmented image
axes[i+1].imshow(segmented_img)
axes[i+1].set_title(f'{k} Clusters')
axes[i+1].axis('off')
# Hide any unused subplots (if total < 4)
for j in range(len(cluster_list)+1, 4):
axes[j].axis('off')
plt.tight_layout()
plt.show()
C:\Users\achmf\.conda\envs\mlenv\lib\site-packages\sklearn\cluster\_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10) C:\Users\achmf\.conda\envs\mlenv\lib\site-packages\sklearn\cluster\_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10) C:\Users\achmf\.conda\envs\mlenv\lib\site-packages\sklearn\cluster\_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10)
K-Means Clustering for Image Segmentation with Fixed Colors
Overview
This example demonstrates how to segment an image using K-Means clustering applied to its color pixels. Each cluster is assigned a predefined fixed color, allowing for a clear visual comparison of segmentation with different numbers of clusters (k = 3, 5, 7).
This approach reduces image complexity by grouping similar color regions, which can be useful for scene simplification, preprocessing, or classification.
Explanation
- Image Loading and Preprocessing
image = io.imread(...)
pixels = image.reshape(-1, 3)
- The image is loaded and reshaped into a 2D array where each row is an RGB pixel.
- This format is suitable for applying clustering to color features.
- Predefining Colors for Clusters
cluster_colors = np.array([...])
- Each cluster is assigned a fixed color (e.g. red, green, blue, etc.).
- Helps visualize segmentation consistently across different values of
k.
- Looping Over Different Cluster Numbers
for k in [3, 5, 7]:
...
- For each
k, K-Means groups the pixels intokclusters based on RGB similarity. - The resulting label for each pixel is mapped to a predefined color using modular indexing (
labels % len(cluster_colors)).
- Visualization
- Original image is shown alongside segmented images for comparison.
- Subplots are arranged in a 2×2 grid with
matplotlib.
Key Concepts
K-Means Clustering:
- Groups data points (pixels) based on similarity (in RGB space here).
- Objective: minimize intra-cluster distance.
Image Segmentation:
- Simplifies the image by reducing color variance while preserving regions.
- Useful for object recognition, compression, and visual abstraction.
Fixed Color Mapping:
- Using a consistent color palette improves interpretability.
- Especially useful for comparing multiple cluster sizes.
Parameters to Tune
| Parameter | Description |
|---|---|
n_clusters |
Number of color segments (k). Higher k means more details. |
random_state |
Seed for reproducibility of results |
cluster_colors |
Predefined palette to label each segment |
Applications
- Color Quantization: Reducing image colors for compression.
- Scene Simplification: Preprocessing for object detection and tracking.
- Thematic Mapping: Classifying land cover in satellite imagery.
- Style Transfer / Image Abstraction: Creating stylized versions of real scenes.
Summary
This example shows how to:
- Use K-Means clustering for color-based segmentation.
- Map clustered pixels to a fixed color set.
- Compare the effect of different numbers of clusters (
k) visually.
10. [BONUS] PERFORM PCA FOR THE RGB Data¶
import numpy as np
import matplotlib.pyplot as plt
from skimage import io
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
# --- Load image ---
image = io.imread('../02_Data/UB_BUILDING2.jpg')
pixels = image.reshape(-1, 3)
h, w, _ = image.shape
k = 5
# Define fixed colors for each cluster
cluster_colors = np.array([
[255, 0, 0], # Red
[0, 255, 0], # Green
[0, 0, 255], # Blue
[255, 255, 0], # Yellow
[0, 255, 255], # Cyan
], dtype=np.uint8)
# --- KMeans on RGB ---
kmeans_rgb = KMeans(n_clusters=k, random_state=42)
labels_rgb = kmeans_rgb.fit_predict(pixels).reshape(h, w)
segmented_rgb = cluster_colors[labels_rgb % len(cluster_colors)]
# --- KMeans on PCA-reduced data ---
pca = PCA(n_components=2)
pixels_pca = pca.fit_transform(pixels)
kmeans_pca = KMeans(n_clusters=k, random_state=42)
labels_pca = kmeans_pca.fit_predict(pixels_pca).reshape(h, w)
segmented_pca = cluster_colors[labels_pca % len(cluster_colors)]
# --- Plot results ---
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()
axes[0].imshow(image)
axes[0].set_title("1. Original Image")
axes[0].axis('off')
axes[1].imshow(segmented_rgb)
axes[1].set_title("2. KMeans (RGB Features)")
axes[1].axis('off')
axes[2].imshow(segmented_pca)
axes[2].set_title("3. KMeans (PCA Features)")
axes[2].axis('off')
axes[3].axis('off') # leave blank for layout balance
plt.tight_layout()
plt.show()
C:\Users\achmf\.conda\envs\mlenv\lib\site-packages\sklearn\cluster\_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10) C:\Users\achmf\.conda\envs\mlenv\lib\site-packages\sklearn\cluster\_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10)